{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "trainDf = pd.read_csv('one-hot-train.csv')\n",
    "preDf = pd.read_csv('one-hot-pre.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EmployeeNumber</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>Gender</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>...</th>\n",
       "      <th>EducationField_5</th>\n",
       "      <th>JobRole_0</th>\n",
       "      <th>JobRole_1</th>\n",
       "      <th>JobRole_2</th>\n",
       "      <th>JobRole_3</th>\n",
       "      <th>JobRole_4</th>\n",
       "      <th>JobRole_5</th>\n",
       "      <th>JobRole_6</th>\n",
       "      <th>JobRole_7</th>\n",
       "      <th>JobRole_8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>54</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1245</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>34</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>147</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1026</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1111</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 44 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Age  Attrition  BusinessTravel  DistanceFromHome  Education  \\\n",
       "0   37          0               2                 1          4   \n",
       "1   54          0               1                 1          4   \n",
       "2   34          1               1                 7          3   \n",
       "3   39          0               2                 1          1   \n",
       "4   28          1               1                 1          3   \n",
       "\n",
       "   EmployeeNumber  EnvironmentSatisfaction  Gender  JobInvolvement  JobLevel  \\\n",
       "0              77                        1       1               2         2   \n",
       "1            1245                        4       0               3         3   \n",
       "2             147                        1       1               1         2   \n",
       "3            1026                        4       0               2         4   \n",
       "4            1111                        1       1               2         1   \n",
       "\n",
       "     ...      EducationField_5  JobRole_0  JobRole_1  JobRole_2  JobRole_3  \\\n",
       "0    ...                     0          0          0          0          0   \n",
       "1    ...                     0          0          0          0          0   \n",
       "2    ...                     0          0          0          1          0   \n",
       "3    ...                     0          0          0          0          0   \n",
       "4    ...                     0          0          0          1          0   \n",
       "\n",
       "   JobRole_4  JobRole_5  JobRole_6  JobRole_7  JobRole_8  \n",
       "0          1          0          0          0          0  \n",
       "1          1          0          0          0          0  \n",
       "2          0          0          0          0          0  \n",
       "3          1          0          0          0          0  \n",
       "4          0          0          0          0          0  \n",
       "\n",
       "[5 rows x 44 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainDf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_trainDf = trainDf['Attrition']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0\n",
       "1    0\n",
       "2    1\n",
       "3    0\n",
       "4    1\n",
       "Name: Attrition, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_trainDf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainDf.drop(['Attrition', 'PerformanceRating', 'PercentSalaryHike', 'NumCompaniesWorked', 'Gender', 'BusinessTravel'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "x1_train, x1_test, y1_train, y1_test = train_test_split(trainDf, y_trainDf, test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    9.0s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,\n",
      "           max_features='auto', max_leaf_nodes=None,\n",
      "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
      "           min_samples_leaf=1, min_samples_split=20,\n",
      "           min_weight_fraction_leaf=0.0, n_estimators=65, n_jobs=None,\n",
      "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "# Set the parameters by cross-validation\n",
    "tuned_parameters = {'n_estimators': [65,75,85], 'max_depth': [10,20], 'min_samples_split': [15,20,25]}\n",
    "\n",
    "\n",
    "# clf = ensemble.RandomForestRegressor(n_estimators=500, n_jobs=1, verbose=1)\n",
    "gridCV = GridSearchCV(RandomForestRegressor(), tuned_parameters, cv=5, n_jobs=-1, verbose=1)\n",
    "gridCV.fit(x1_train, y1_train)\n",
    "print (gridCV.best_estimator_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " R^2 (train) = 0.570453, R^2 (valid) = 0.183908, RMSE (train) = 0.241782, RMSE (valid) = 0.330420\n"
     ]
    }
   ],
   "source": [
    "reg = gridCV.best_estimator_\n",
    "training_accuracy = reg.score(x1_train, y1_train)\n",
    "valid_accuracy = reg.score(x1_test, y1_test)\n",
    "rmsetrain = np.sqrt(mean_squared_error(reg.predict(x1_train),y1_train))\n",
    "rmsevalid = np.sqrt(mean_squared_error(reg.predict(x1_test),y1_test))\n",
    "print (\" R^2 (train) = %0.6f, R^2 (valid) = %0.6f, RMSE (train) = %0.6f, RMSE (valid) = %0.6f\" % (training_accuracy, valid_accuracy, rmsetrain, rmsevalid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['random_forest_model0241555.pkl']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.externals import joblib\n",
    "joblib.dump(reg, 'random_forest_model0241555.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_file = 'random_forest_model0241555.pkl'\n",
    "regressor = joblib.load(model_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"['PerformanceRating' 'PercentSalaryHike' 'NumCompaniesWorked' 'Gender'\\n 'BusinessTravel'] not found in axis\"",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-25-a04544e4fcff>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpreDf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'PerformanceRating'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'PercentSalaryHike'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'NumCompaniesWorked'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'Gender'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'BusinessTravel'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mD:\\Anaconda\\envs\\Pytorch-CUDA10.0\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   3695\u001b[0m                                            \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3696\u001b[0m                                            \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3697\u001b[1;33m                                            errors=errors)\n\u001b[0m\u001b[0;32m   3698\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3699\u001b[0m     @rewrite_axis_style_signature('mapper', [('copy', True),\n",
      "\u001b[1;32mD:\\Anaconda\\envs\\Pytorch-CUDA10.0\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   3109\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3110\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3111\u001b[1;33m                 \u001b[0mobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3112\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3113\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Anaconda\\envs\\Pytorch-CUDA10.0\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[1;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[0;32m   3141\u001b[0m                 \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3142\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3143\u001b[1;33m                 \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3144\u001b[0m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3145\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Anaconda\\envs\\Pytorch-CUDA10.0\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m   4402\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;34m'ignore'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4403\u001b[0m                 raise KeyError(\n\u001b[1;32m-> 4404\u001b[1;33m                     '{} not found in axis'.format(labels[mask]))\n\u001b[0m\u001b[0;32m   4405\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4406\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: \"['PerformanceRating' 'PercentSalaryHike' 'NumCompaniesWorked' 'Gender'\\n 'BusinessTravel'] not found in axis\""
     ]
    }
   ],
   "source": [
    "preDf.drop(['PerformanceRating', 'PercentSalaryHike', 'NumCompaniesWorked', 'Gender', 'BusinessTravel'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prediction = regressor.predict(preDf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.11014671, 0.10061656, 0.11347277, 0.06137468, 0.21146893,\n",
       "       0.07351433, 0.01577124, 0.11846133, 0.11610212, 0.09940218,\n",
       "       0.10873179, 0.44882024, 0.57217633, 0.13265259, 0.35759533,\n",
       "       0.07458546, 0.20382137, 0.0752331 , 0.40609394, 0.20980392,\n",
       "       0.6088611 , 0.39097407, 0.10249575, 0.10261188, 0.22282142,\n",
       "       0.23065452, 0.20209671, 0.06687857, 0.19802291, 0.31417064,\n",
       "       0.19042464, 0.21147726, 0.10212421, 0.10909885, 0.09342909,\n",
       "       0.26536798, 0.63987673, 0.25569985, 0.04884936, 0.31819809,\n",
       "       0.08754499, 0.36469325, 0.11463047, 0.10746648, 0.017675  ,\n",
       "       0.2791614 , 0.29471999, 0.14227265, 0.16587269, 0.11363871,\n",
       "       0.48304722, 0.28715184, 0.12965669, 0.14484891, 0.38195139,\n",
       "       0.29665629, 0.42763894, 0.06796273, 0.11843347, 0.13975719,\n",
       "       0.69003616, 0.26688896, 0.12775613, 0.20593823, 0.20604776,\n",
       "       0.35237531, 0.12220143, 0.79638388, 0.30325215, 0.19511162,\n",
       "       0.03732177, 0.22305171, 0.01192487, 0.11436881, 0.15513963,\n",
       "       0.07929697, 0.1381332 , 0.28171211, 0.21115947, 0.49231547,\n",
       "       0.12675403, 0.30927836, 0.27714008, 0.11857331, 0.77669441,\n",
       "       0.35592004, 0.15640611, 0.08720209, 0.06784157, 0.19168086,\n",
       "       0.47557914, 0.11950098, 0.18419844, 0.30436208, 0.44027585,\n",
       "       0.05168927, 0.26090512, 0.19808987, 0.18306736, 0.17267887,\n",
       "       0.07507441, 0.14708817, 0.15725265, 0.63175706, 0.0519929 ,\n",
       "       0.13127294, 0.19715455, 0.08934535, 0.15348236, 0.13018491,\n",
       "       0.10805234, 0.3896399 , 0.27910677, 0.17135714, 0.13032598,\n",
       "       0.11802484, 0.22984939, 0.73802867, 0.06118118, 0.50027022,\n",
       "       0.16056168, 0.1704783 , 0.35610962, 0.24891562, 0.9052846 ,\n",
       "       0.02361527, 0.17945938, 0.20958254, 0.40747877, 0.32644925,\n",
       "       0.25309177, 0.6233923 , 0.09153485, 0.15076148, 0.28645013,\n",
       "       0.21911296, 0.10769311, 0.00781534, 0.11050715, 0.43261008,\n",
       "       0.17438425, 0.14644728, 0.18067547, 0.19286976, 0.23282082,\n",
       "       0.07387283, 0.25030576, 0.15195138, 0.2960382 , 0.23282605,\n",
       "       0.45502884, 0.05051276, 0.17623424, 0.14288486, 0.17594781,\n",
       "       0.03629192, 0.11662036, 0.40780252, 0.36780904, 0.14283491,\n",
       "       0.07993486, 0.18675414, 0.1620753 , 0.26127577, 0.5039093 ,\n",
       "       0.09646465, 0.62193228, 0.18524621, 0.18230587, 0.13480291,\n",
       "       0.42569197, 0.4499318 , 0.20807494, 0.16059395, 0.21883083,\n",
       "       0.08025536, 0.13056646, 0.7462637 , 0.05886039, 0.1411441 ,\n",
       "       0.14811685, 0.07703276, 0.17674163, 0.52107841, 0.43596563,\n",
       "       0.10328824, 0.33691412, 0.11025214, 0.33765267, 0.11348618,\n",
       "       0.22426396, 0.16317584, 0.14008209, 0.18392078, 0.33112245,\n",
       "       0.09634865, 0.11047135, 0.09699554, 0.13293215, 0.15082321,\n",
       "       0.10761745, 0.0971648 , 0.10309998, 0.13422684, 0.20564114,\n",
       "       0.04674372, 0.22198579, 0.17511297, 0.25427182, 0.28321528,\n",
       "       0.37003708, 0.18853669, 0.05690849, 0.08681477, 0.26275042,\n",
       "       0.13921721, 0.17264776, 0.16172581, 0.09836867, 0.1992931 ,\n",
       "       0.42955942, 0.26216168, 0.19102141, 0.33468025, 0.17546988,\n",
       "       0.42685617, 0.15166713, 0.20726175, 0.23914904, 0.46359876,\n",
       "       0.16681134, 0.21117357, 0.08957   , 0.25698417, 0.11829008,\n",
       "       0.23691221, 0.08377364, 0.07003363, 0.74125254, 0.25201647,\n",
       "       0.09672489, 0.09020815, 0.09874345, 0.11405882, 0.28668927,\n",
       "       0.16504857, 0.18550539, 0.64507983, 0.07556018, 0.05912855,\n",
       "       0.1718484 , 0.14468424, 0.25405709, 0.37821443, 0.09147301,\n",
       "       0.23048596, 0.41261185, 0.17363728, 0.2855216 , 0.31173171,\n",
       "       0.15407519, 0.41265637, 0.03559193, 0.15605264, 0.38952198,\n",
       "       0.04028745, 0.14964554, 0.15016622, 0.61619873, 0.14353028,\n",
       "       0.10774724, 0.08283749, 0.39574076, 0.17952498, 0.12028278,\n",
       "       0.4971546 , 0.08516149, 0.36496222, 0.17386198, 0.10147914,\n",
       "       0.1597339 , 0.44143359, 0.12770498, 0.10338385, 0.28415826,\n",
       "       0.13536831, 0.32555052, 0.16956804, 0.09328703, 0.16885968,\n",
       "       0.20853573, 0.20275952, 0.08932406, 0.17356819, 0.13840132,\n",
       "       0.30948945, 0.36809814, 0.09884162, 0.30899396, 0.60115137,\n",
       "       0.10279129, 0.07741377, 0.12256884, 0.14973555, 0.29630735,\n",
       "       0.15474834, 0.11859112, 0.3048089 , 0.15547838, 0.40424613,\n",
       "       0.08900689, 0.24207516, 0.00422573, 0.00715546, 0.10589743,\n",
       "       0.17367166, 0.05748351, 0.13017126, 0.11319731, 0.17661638,\n",
       "       0.42205918, 0.32838601, 0.17021455, 0.32546558, 0.01087635,\n",
       "       0.1113631 , 0.11392371, 0.11299601, 0.39431722, 0.01515693,\n",
       "       0.15289496, 0.19483407, 0.44682591, 0.04807808, 0.10172473,\n",
       "       0.06452458, 0.14464632, 0.60999906, 0.09547711, 0.34605776,\n",
       "       0.51467149, 0.38592829, 0.1561068 , 0.20854444, 0.21607997,\n",
       "       0.27535146, 0.16193171, 0.15701572, 0.32264756, 0.09316287])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(prediction)):\n",
    "    if prediction[i] >= 0.5:\n",
    "        prediction[i] = 1\n",
    "    else:\n",
    "        prediction[i] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [1.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.],\n",
       "       [0.]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction.reshape(-1,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "out = pd.DataFrame(prediction,dtype='int64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0\n",
       "0  0\n",
       "1  0\n",
       "2  0\n",
       "3  0\n",
       "4  0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "out.columns=['result']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "out.to_csv('resultRandomForest00241555.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
